##########################################################
# John Henderson and Alex Theodoridis
# Replication Data for: "Seeing Spots", 
#  Forthcoming in Political Behavior, August 20, 2017
# 
##########################################################
#
#  covar_data_vagov.R
#  -- file produces regression covariates from va_data
#
##########################################################
    
rm(list=ls())                                    
source('~/Dropbox/Seeing_Spots/replication/pre_data_vagov.R')

#ymat=cbind(video_skipped, replay, share, getlink, all_y,time_watched, total_time)
  
# goal is to outline characteristics of skippers v. watchers v. other info seekers/avoiders
 # - given characteristics   
 # - given experiment + characteristics

# covariates
#pid_lean
pids=pid_lean   
pids[which(pids==-1)]=2 
partisan=abs(pid_lean)

libcon=as.numeric(va_data$ideo5) # lib-con
libcon=libcon-3
libcon[which(libcon>2)]=0

#newsint=newsint
women=as.numeric(va_data$gender)-1  # men 0, women 1

educf=va_data$educ   
educ=as.numeric(va_data$educ)

age=2012-as.numeric(va_data$birthyr)

racef=va_data$race_pre
white=as.numeric(racef=='White')
black=as.numeric(racef=='Black')
hisp=as.numeric(racef=='Hispanic')                               

#va_data$hispanic
employf=va_data$employ_pre
employed=as.numeric(employf=='Full-time')
unemployed=as.numeric(employf=='Unemployed')

marriedf=va_data$marstat_pre
married=as.numeric(marriedf=='Married')

incomef=va_data$faminc
income=as.numeric(incomef)
income[which(income>18)]=sample(size=length(which(income>18)),income[which(income<18)])

reg=as.numeric(va_data$votereg=='Yes')
vote=as.numeric(as.numeric(va_data$Vote2012_pre)<4)   
preschoice=as.numeric(va_data$Vote2012_pre=='Barack Obama (Democratic)') # 2008 vote choice

uncertain=as.numeric(va_data$govhypo_pre_pre=="I'm not sure ")          

is.even <- function(x) x %% 2 == 0
countyFinal=cbind(cnty_returns[!is.even(1:nrow(cnty_returns)),c(1,2,4)],cnty_returns[is.even(1:nrow(cnty_returns)),c(2,4)])


counties=as.character(countyFinal[which(abs(countyFinal[,3]-countyFinal[,5])<.1),1])

zip_outs = gsub(zip_outs,pattern=' City',replace='')
counties = gsub(counties,pattern=' City',replace='')
counties = gsub(counties,pattern=paste("[ ]{2,}",sep=''),replace='') 

zipped=as.numeric(as.character(va_data$zipcode2_pre))       
zips=unique(zipped)
counties_pre=array(NA,length(va_data[[1]]))

for(j in 1:length(zips)){
	ix=which(zips[j]==zipped)
	if(length(ix)>0){
		counties_pre[ix]=zip_outs[j]
	}
}

battleground=array(0,length(va_data[[1]]))
for(j in 1:length(counties)){
   ix=agrep(counties_pre,pattern=counties[j])
   battleground[ix]=1
}

churchattend=-as.numeric(va_data$pew_churatd)+abs(min(-as.numeric(va_data$pew_churatd)))

mdsscale=libcon  

# pid strength...   
pid7=as.numeric(va_data$pid7zero)-4
pid7[which(pid7>3)]=0                                                                  

pid_strenth=abs(pid7)
    
#END covar_data_vagov.R